library(dplyr) # Data manipulation
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape2) # Data reshaping for ggplot
library(ggplot2) # Data visualization
library(readxl) # CSV file I/O, e.g. the read_csv function
library(zipcode)
library(maps)
library(ggmap)
library(shiny)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
##
## wind
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Speed_dating = read.csv("~/Desktop/washam2/Spooky Stats/SpeedDating.csv", na.strings = c("",".","NA"))
variables = c("iid","gender", "age","income", "field","attr1_1", "sinc1_1", "intel1_1", "fun1_1", "amb1_1", "shar1_1")
Speed_dating_edit = Speed_dating[variables]
Speed_dating_combined = Speed_dating_edit[!duplicated(Speed_dating_edit$iid), ]
Speed_dating_clean = na.omit(Speed_dating_combined)
Find the average of the 5 variables
Speed_dating_average = data.frame(name = c("Attraction", "Sincerity", "Intelligence ", "Fun","Ambition","Share"), mean= c( mean(Speed_dating_clean$attr1_1), mean(Speed_dating_clean$sinc1_1), mean(Speed_dating_clean$intel1_1), mean(Speed_dating_clean$fun1_1), mean(Speed_dating_clean$amb1_1), mean(Speed_dating_clean$shar1_1)))
ggplot(Speed_dating_average, aes(x= name, y = mean)) + geom_bar(stat = "identity") + xlab("") + ylab("Mean Score")

Find the percentage of matched and divide into two groups (high and low)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(dplyr)
Speed_dating_count = tally(group_by(Speed_dating, iid))
Speed_dating_sum = aggregate(match~ iid, data = Speed_dating, sum)
Speed_dating_table = merge(Speed_dating_sum, Speed_dating_count, by = "iid")
Speed_dating_table$percent = (Speed_dating_table$match / Speed_dating_table$n) * 100
Speed_dating_table$group = ifelse(Speed_dating_table$percent < 50, "Low Match","High Match")
Speed_dating_final = merge(Speed_dating_table, Speed_dating_clean, by = "iid")
Speed_dating_final
## iid match n percent group gender age income
## 1 1 4 10 40.000000 Low Match 0 21 69,487.00
## 2 2 2 10 20.000000 Low Match 0 24 65,929.00
## 3 4 2 10 20.000000 Low Match 0 23 37,754.00
## 4 5 2 10 20.000000 Low Match 0 21 86,340.00
## 5 6 2 10 20.000000 Low Match 0 23 60,304.00
## 6 7 2 10 20.000000 Low Match 0 22 54,620.00
## 7 10 2 10 20.000000 Low Match 0 26 48,652.00
## 8 12 2 10 20.000000 Low Match 1 22 29,237.00
## 9 14 8 10 80.000000 High Match 1 23 56,580.00
## 10 15 3 10 30.000000 Low Match 1 24 36,782.00
## 11 17 1 10 10.000000 Low Match 1 30 38,548.00
## 12 18 1 10 10.000000 Low Match 1 27 52,010.00
## 13 22 2 16 12.500000 Low Match 0 27 28,418.00
## 14 24 0 16 0.000000 Low Match 0 27 43,185.00
## 15 25 0 16 0.000000 Low Match 0 25 23,152.00
## 16 26 0 16 0.000000 Low Match 0 23 43,664.00
## 17 27 2 16 12.500000 Low Match 0 22 48,441.00
## 18 29 2 16 12.500000 Low Match 0 22 61,152.00
## 19 31 4 16 25.000000 Low Match 0 24 36,485.00
## 20 33 0 16 0.000000 Low Match 0 30 41,507.00
## 21 35 5 16 31.250000 Low Match 0 25 17,134.00
## 22 37 2 16 12.500000 Low Match 0 27 30,038.00
## 23 38 3 16 18.750000 Low Match 0 23 33,772.00
## 24 39 1 16 6.250000 Low Match 0 24 24,997.00
## 25 40 0 19 0.000000 Low Match 1 25 42,096.00
## 26 42 0 19 0.000000 Low Match 1 23 28,891.00
## 27 49 6 19 31.578947 Low Match 1 25 62,635.00
## 28 50 4 19 21.052632 Low Match 1 27 12,063.00
## 29 51 1 19 5.263158 Low Match 1 23 29,809.00
## 30 52 1 19 5.263158 Low Match 1 21 26,482.00
## 31 54 0 19 0.000000 Low Match 1 28 30,147.00
## 32 55 3 19 15.789474 Low Match 1 23 39,919.00
## 33 56 2 10 20.000000 Low Match 0 23 41,466.00
## 34 61 2 10 20.000000 Low Match 0 28 23,988.00
## 35 62 3 10 30.000000 Low Match 0 26 28,989.00
## 36 63 1 10 10.000000 Low Match 0 24 50,948.00
## 37 64 1 10 10.000000 Low Match 0 27 38,022.00
## 38 67 2 10 20.000000 Low Match 1 22 47,559.00
## 39 68 0 10 0.000000 Low Match 1 27 53,539.00
## 40 70 1 10 10.000000 Low Match 1 26 32,159.00
## 41 71 3 10 30.000000 Low Match 1 21 53,940.00
## 42 75 2 10 20.000000 Low Match 1 24 40,753.00
## 43 76 5 18 27.777778 Low Match 0 34 38,207.00
## 44 77 6 18 33.333333 Low Match 0 22 46,166.00
## 45 78 3 18 16.666667 Low Match 0 23 30,973.00
## 46 81 3 18 16.666667 Low Match 0 28 28,317.00
## 47 82 5 18 27.777778 Low Match 0 25 26,645.00
## 48 85 1 18 5.555556 Low Match 0 30 25,589.00
## 49 86 8 18 44.444444 Low Match 0 22 55,223.00
## 50 88 0 18 0.000000 Low Match 0 23 109,031.00
## 51 91 8 18 44.444444 Low Match 0 25 40,409.00
## 52 92 7 18 38.888889 Low Match 0 29 21,597.00
## 53 95 1 18 5.555556 Low Match 1 26 76,624.00
## 54 97 5 18 27.777778 Low Match 1 29 35,968.00
## 55 98 2 18 11.111111 Low Match 1 39 51,725.00
## 56 99 8 18 44.444444 Low Match 1 28 55,419.00
## 57 103 1 18 5.555556 Low Match 1 26 55,550.00
## 58 104 5 18 27.777778 Low Match 1 27 26,682.00
## 59 105 8 18 44.444444 Low Match 1 25 41,547.00
## 60 106 2 18 11.111111 Low Match 1 22 23,361.00
## 61 107 11 18 61.111111 High Match 1 21 74,893.00
## 62 110 4 18 22.222222 Low Match 1 28 52,804.00
## 63 113 3 10 30.000000 Low Match 0 21 53,923.00
## 64 115 3 10 30.000000 Low Match 0 21 27,094.00
## 65 117 1 10 10.000000 Low Match 0 21 57,213.00
## 66 120 4 10 40.000000 Low Match 0 20 42,390.00
## 67 121 0 10 0.000000 Low Match 0 21 43,636.00
## 68 122 3 10 30.000000 Low Match 1 22 57,887.00
## 69 123 0 10 0.000000 Low Match 1 18 30,768.00
## 70 124 0 10 0.000000 Low Match 1 22 66,699.00
## 71 125 7 10 70.000000 High Match 1 21 45,360.00
## 72 126 2 10 20.000000 Low Match 1 22 55,080.00
## 73 131 0 10 0.000000 Low Match 1 19 17,378.00
## 74 132 1 5 20.000000 Low Match 0 27 40,375.00
## 75 134 1 5 20.000000 Low Match 0 23 48,929.00
## 76 137 1 5 20.000000 Low Match 1 26 78,193.00
## 77 138 1 5 20.000000 Low Match 1 32 63,351.00
## 78 142 7 16 43.750000 Low Match 0 24 50,745.00
## 79 144 1 16 6.250000 Low Match 0 23 29,279.00
## 80 145 0 16 0.000000 Low Match 0 24 38,774.00
## 81 146 1 16 6.250000 Low Match 0 36 58,802.00
## 82 147 5 16 31.250000 Low Match 0 26 41,831.00
## 83 149 5 16 31.250000 Low Match 0 28 52,186.00
## 84 150 2 16 12.500000 Low Match 0 31 55,080.00
## 85 154 7 16 43.750000 Low Match 0 27 97,857.00
## 86 155 1 16 6.250000 Low Match 0 27 74,624.00
## 87 156 4 16 25.000000 Low Match 0 30 21,590.00
## 88 157 1 16 6.250000 Low Match 0 28 38,832.00
## 89 158 0 16 0.000000 Low Match 1 23 37,248.00
## 90 160 3 16 18.750000 Low Match 1 29 28,240.00
## 91 161 2 16 12.500000 Low Match 1 22 53,771.00
## 92 163 3 16 18.750000 Low Match 1 26 56,096.00
## 93 164 2 16 12.500000 Low Match 1 27 31,560.00
## 94 165 3 16 18.750000 Low Match 1 30 52,467.00
## 95 166 3 16 18.750000 Low Match 1 26 80,006.00
## 96 167 3 16 18.750000 Low Match 1 23 47,572.00
## 97 168 1 16 6.250000 Low Match 1 23 22,439.00
## 98 169 5 16 31.250000 Low Match 1 28 31,383.00
## 99 174 2 10 20.000000 Low Match 0 29 58,802.00
## 100 175 3 10 30.000000 Low Match 0 23 40,749.00
## 101 177 0 10 0.000000 Low Match 0 22 47,997.00
## 102 179 1 10 10.000000 Low Match 0 30 78,704.00
## 103 181 5 10 50.000000 High Match 0 22 31,143.00
## 104 184 4 10 40.000000 Low Match 1 34 32,129.00
## 105 186 3 10 30.000000 Low Match 1 24 44,195.00
## 106 188 1 10 10.000000 Low Match 1 21 46,837.00
## 107 192 1 10 10.000000 Low Match 1 24 97,972.00
## 108 193 2 10 20.000000 Low Match 1 25 35,960.00
## 109 194 1 20 5.000000 Low Match 0 24 65,708.00
## 110 195 5 20 25.000000 Low Match 0 27 49,466.00
## 111 197 3 20 15.000000 Low Match 0 24 53,229.00
## 112 199 5 20 25.000000 Low Match 0 30 32,649.00
## 113 200 3 20 15.000000 Low Match 0 25 35,867.00
## 114 202 2 20 10.000000 Low Match 0 22 40,244.00
## 115 203 0 20 0.000000 Low Match 0 22 42,640.00
## 116 204 0 20 0.000000 Low Match 0 26 52,388.00
## 117 208 11 20 55.000000 High Match 0 25 62,875.00
## 118 209 0 20 0.000000 Low Match 0 28 30,855.00
## 119 210 3 20 15.000000 Low Match 0 29 46,800.00
## 120 212 9 20 45.000000 Low Match 0 27 45,695.00
## 121 214 2 20 10.000000 Low Match 1 23 46,792.00
## 122 215 5 20 25.000000 Low Match 1 26 53,501.00
## 123 216 0 20 0.000000 Low Match 1 27 55,080.00
## 124 217 8 20 40.000000 Low Match 1 26 64,716.00
## 125 223 1 20 5.000000 Low Match 1 28 27,248.00
## 126 226 4 20 20.000000 Low Match 1 23 22,805.00
## 127 227 3 20 15.000000 Low Match 1 26 56,118.00
## 128 229 4 20 20.000000 Low Match 1 28 30,146.00
## 129 230 4 20 20.000000 Low Match 1 42 39,123.00
## 130 231 3 20 15.000000 Low Match 1 30 46,153.00
## 131 232 1 20 5.000000 Low Match 1 31 45,300.00
## 132 233 5 20 25.000000 Low Match 1 29 42,397.00
## 133 234 0 9 0.000000 Low Match 0 22 44,346.00
## 134 236 0 9 0.000000 Low Match 0 24 42,225.00
## 135 237 1 9 11.111111 Low Match 0 34 37,405.00
## 136 238 2 9 22.222222 Low Match 0 27 28,524.00
## 137 240 1 9 11.111111 Low Match 0 26 61,141.00
## 138 241 3 9 33.333333 Low Match 0 29 8,607.00
## 139 246 1 9 11.111111 Low Match 1 25 41,476.00
## 140 248 3 9 33.333333 Low Match 1 27 49,841.00
## 141 254 0 21 0.000000 Low Match 0 22 37,240.00
## 142 255 0 21 0.000000 Low Match 0 26 36,594.00
## 143 256 3 21 14.285714 Low Match 0 24 62,997.00
## 144 257 0 21 0.000000 Low Match 0 24 46,608.00
## 145 258 7 21 33.333333 Low Match 0 25 37,881.00
## 146 259 1 21 4.761905 Low Match 0 25 48,944.00
## 147 261 5 21 23.809524 Low Match 0 26 77,112.00
## 148 263 6 21 28.571429 Low Match 0 26 18,283.00
## 149 264 4 21 19.047619 Low Match 0 26 31,432.00
## 150 265 1 21 4.761905 Low Match 0 32 73,073.00
## 151 268 10 21 47.619048 Low Match 0 24 26,706.00
## 152 271 2 21 9.523810 Low Match 0 23 50,060.00
## 153 272 0 21 0.000000 Low Match 0 25 25,401.00
## 154 276 5 21 23.809524 Low Match 1 26 80,608.00
## 155 279 5 21 23.809524 Low Match 1 25 43,844.00
## 156 288 4 21 19.047619 Low Match 1 27 53,196.00
## 157 289 5 21 23.809524 Low Match 1 28 53,229.00
## 158 292 4 21 19.047619 Low Match 1 28 25,786.00
## 159 295 0 14 0.000000 Low Match 0 30 39,394.00
## 160 296 3 14 21.428571 Low Match 0 28 40,695.00
## 161 297 4 14 28.571429 Low Match 0 21 45,788.00
## 162 298 0 14 0.000000 Low Match 0 22 37,315.00
## 163 299 1 14 7.142857 Low Match 0 24 51,663.00
## 164 300 2 14 14.285714 Low Match 0 35 32,563.00
## 165 302 0 14 0.000000 Low Match 0 21 54,303.00
## 166 303 1 14 7.142857 Low Match 0 33 16,908.00
## 167 304 1 14 7.142857 Low Match 0 28 39,729.00
## 168 307 3 14 21.428571 Low Match 0 27 57,316.00
## 169 309 2 14 14.285714 Low Match 1 32 30,587.00
## 170 310 1 14 7.142857 Low Match 1 25 55,080.00
## 171 311 1 14 7.142857 Low Match 1 29 57,513.00
## 172 312 1 14 7.142857 Low Match 1 23 33,772.00
## 173 316 6 14 42.857143 Low Match 1 25 31,857.00
## 174 320 0 14 0.000000 Low Match 1 36 23,207.00
## 175 343 2 18 11.111111 Low Match 0 32 25,831.00
## 176 344 4 18 22.222222 Low Match 0 26 28,759.00
## 177 345 4 18 22.222222 Low Match 0 24 25,401.00
## 178 347 0 18 0.000000 Low Match 0 26 19,264.00
## 179 348 1 18 5.555556 Low Match 0 21 41,778.00
## 180 349 8 18 44.444444 Low Match 0 33 35,963.00
## 181 354 8 18 44.444444 Low Match 0 22 49,409.00
## 182 356 1 18 5.555556 Low Match 0 27 31,516.00
## 183 357 3 18 16.666667 Low Match 0 27 36,223.00
## 184 358 2 18 11.111111 Low Match 0 26 43,367.00
## 185 359 3 18 16.666667 Low Match 0 26 27,503.00
## 186 361 1 20 5.000000 Low Match 1 34 35,187.00
## 187 363 2 20 10.000000 Low Match 1 23 26,298.00
## 188 369 8 20 40.000000 Low Match 1 25 31,148.00
## 189 370 1 20 5.000000 Low Match 1 28 55,704.00
## 190 371 4 20 20.000000 Low Match 1 27 69,487.00
## 191 372 3 20 15.000000 Low Match 1 27 46,138.00
## 192 376 2 20 10.000000 Low Match 1 25 66,827.00
## 193 378 1 20 5.000000 Low Match 1 29 42,897.00
## 194 379 2 19 10.526316 Low Match 0 38 31,809.00
## 195 382 4 19 21.052632 Low Match 0 28 52,010.00
## 196 385 7 19 36.842105 Low Match 0 25 78,704.00
## 197 386 2 19 10.526316 Low Match 0 26 75,347.00
## 198 388 1 19 5.263158 Low Match 0 24 47,005.00
## 199 389 8 19 42.105263 Low Match 0 21 52,805.00
## 200 391 1 19 5.263158 Low Match 0 28 50,725.00
## 201 392 2 19 10.526316 Low Match 0 26 49,409.00
## 202 393 5 19 26.315789 Low Match 0 22 65,693.00
## 203 394 1 19 5.263158 Low Match 0 24 45,736.00
## 204 395 2 19 10.526316 Low Match 0 33 33,906.00
## 205 396 2 19 10.526316 Low Match 0 25 50,501.00
## 206 399 1 18 5.555556 Low Match 1 29 48,785.00
## 207 405 0 18 0.000000 Low Match 1 30 55,080.00
## 208 410 3 18 16.666667 Low Match 1 27 55,080.00
## 209 411 1 18 5.555556 Low Match 1 34 80,006.00
## 210 414 8 18 44.444444 Low Match 1 29 55,080.00
## 211 415 1 18 5.555556 Low Match 1 31 52,318.00
## 212 416 6 8 75.000000 High Match 0 25 62,844.00
## 213 418 0 8 0.000000 Low Match 0 25 52,586.00
## 214 419 1 8 12.500000 Low Match 0 28 29,236.00
## 215 420 2 8 25.000000 Low Match 0 22 31,486.00
## 216 421 2 8 25.000000 Low Match 0 25 78,704.00
## 217 422 4 6 66.666667 High Match 1 26 31,632.00
## 218 425 0 6 0.000000 Low Match 1 28 106,663.00
## 219 427 0 6 0.000000 Low Match 1 28 84,043.00
## 220 428 2 6 33.333333 Low Match 1 27 35,224.00
## 221 429 1 6 16.666667 Low Match 1 27 36,381.00
## 222 430 0 14 0.000000 Low Match 0 22 65,498.00
## 223 432 6 14 42.857143 Low Match 0 28 60,000.00
## 224 433 2 14 14.285714 Low Match 0 30 22,669.00
## 225 434 1 14 7.142857 Low Match 0 27 81,266.00
## 226 437 3 14 21.428571 Low Match 0 23 29,746.00
## 227 438 2 14 14.285714 Low Match 0 23 47,556.00
## 228 439 3 14 21.428571 Low Match 0 29 42,651.00
## 229 443 0 10 0.000000 Low Match 1 27 27,794.00
## 230 444 0 10 0.000000 Low Match 1 26 41,737.00
## 231 445 2 10 20.000000 Low Match 1 30 90,225.00
## 232 448 5 10 50.000000 High Match 1 22 55,080.00
## 233 449 2 10 20.000000 Low Match 1 23 52,280.00
## 234 454 0 6 0.000000 Low Match 0 27 56,056.00
## 235 457 0 6 0.000000 Low Match 0 55 60,835.00
## 236 459 0 6 0.000000 Low Match 0 26 62,829.00
## 237 460 1 6 16.666667 Low Match 1 33 30,038.00
## 238 465 0 6 0.000000 Low Match 1 23 16,767.00
## 239 466 0 15 0.000000 Low Match 0 24 42,967.00
## 240 467 7 15 46.666667 Low Match 0 24 21,488.00
## 241 468 3 15 20.000000 Low Match 0 25 89,977.00
## 242 470 5 15 33.333333 Low Match 0 29 18,619.00
## 243 474 4 15 26.666667 Low Match 0 30 22,161.00
## 244 475 2 15 13.333333 Low Match 0 27 82,734.00
## 245 476 2 15 13.333333 Low Match 0 25 40,163.00
## 246 479 0 15 0.000000 Low Match 0 27 46,185.00
## 247 481 1 15 6.666667 Low Match 1 23 78,844.00
## 248 482 4 15 26.666667 Low Match 1 30 29,575.00
## 249 484 2 15 13.333333 Low Match 1 31 34,752.00
## 250 492 7 15 46.666667 Low Match 1 30 22,173.00
## 251 494 2 15 13.333333 Low Match 1 30 37,994.00
## 252 495 2 15 13.333333 Low Match 1 32 35,409.00
## 253 496 1 7 14.285714 Low Match 0 23 23,707.00
## 254 498 0 7 0.000000 Low Match 0 25 33,772.00
## 255 499 2 7 28.571429 Low Match 0 26 57,501.00
## 256 500 1 7 14.285714 Low Match 0 25 25,314.00
## 257 501 1 7 14.285714 Low Match 0 26 48,876.00
## 258 503 0 6 0.000000 Low Match 1 25 34,870.00
## 259 504 2 6 33.333333 Low Match 1 23 35,848.00
## 260 506 0 6 0.000000 Low Match 1 24 45,017.00
## 261 507 1 6 16.666667 Low Match 1 24 12,416.00
## 262 508 1 6 16.666667 Low Match 1 36 87,789.00
## 263 513 6 22 27.272727 Low Match 0 23 50,572.00
## 264 514 0 22 0.000000 Low Match 0 23 49,642.00
## 265 515 2 22 9.090909 Low Match 0 23 20,000.00
## 266 516 6 22 27.272727 Low Match 0 27 32,508.00
## 267 518 6 22 27.272727 Low Match 0 23 35,627.00
## 268 520 0 22 0.000000 Low Match 0 28 46,280.00
## 269 521 7 22 31.818182 Low Match 0 26 41,191.00
## 270 522 5 22 22.727273 Low Match 0 22 71,787.00
## 271 524 14 22 63.636364 High Match 0 25 72,412.00
## 272 525 0 22 0.000000 Low Match 0 24 36,510.00
## 273 527 0 22 0.000000 Low Match 0 24 32,386.00
## 274 532 3 22 13.636364 Low Match 1 26 46,272.00
## 275 535 8 22 36.363636 Low Match 1 23 48,137.00
## 276 537 2 22 9.090909 Low Match 1 22 61,686.00
## 277 543 0 22 0.000000 Low Match 1 24 47,624.00
## 278 544 3 22 13.636364 Low Match 1 23 36,673.00
## 279 545 2 22 9.090909 Low Match 1 24 16,767.00
## 280 551 2 22 9.090909 Low Match 1 27 55,138.00
## field attr1_1
## 1 Law 15.00
## 2 law 45.00
## 3 Law 20.00
## 4 Law 20.00
## 5 law 10.00
## 6 Law 15.00
## 7 Masters of Social Work&Education 15.00
## 8 Law 60.00
## 9 Law 30.00
## 10 Law 30.00
## 11 political science 35.00
## 12 Business 33.33
## 13 TC (Health Ed) 20.00
## 14 social work 10.00
## 15 Social Work 20.00
## 16 Social Work 20.00
## 17 Speech Language Pathology 17.00
## 18 Social Work 20.00
## 19 law 35.00
## 20 Educational Psychology 30.00
## 21 Mathematics 20.00
## 22 Organizational Psychology 10.00
## 23 Law 50.00
## 24 Law 20.00
## 25 Operations Research 25.00
## 26 Mechanical Engineering 20.00
## 27 Mathematical Finance 20.00
## 28 Law 35.00
## 29 Law 25.00
## 30 Law 15.00
## 31 MBA 40.00
## 32 LAW 20.00
## 33 social work 17.00
## 34 German Literature 15.00
## 35 law 30.00
## 36 Law 5.00
## 37 Social Work 10.00
## 38 Law 20.00
## 39 MFA Creative Writing 21.00
## 40 MBA 25.00
## 41 Law 60.00
## 42 law 20.00
## 43 Classics 10.00
## 44 Social Work 15.00
## 45 Social Work 10.00
## 46 Journalism 10.00
## 47 Elementary/Childhood Education (MA) 20.00
## 48 Masters of Social Work 20.00
## 49 Communications 15.00
## 50 Social Work 15.00
## 51 Law 15.00
## 52 International Educational Development 10.00
## 53 Business 18.00
## 54 Climate-Earth and Environ. Science 50.00
## 55 Social Work 40.00
## 56 Law 30.00
## 57 Business- MBA 40.00
## 58 chemistry 20.00
## 59 law 30.00
## 60 Law 10.00
## 61 Religion 19.00
## 62 Film 20.00
## 63 Economics 15.00
## 64 psychology 20.00
## 65 Economics, Sociology 20.00
## 66 English 10.00
## 67 psychology and english 35.00
## 68 Law 40.00
## 69 Biomedical Engineering 50.00
## 70 Economics and Political Science 40.00
## 71 Art History/medicine 20.00
## 72 philosophy 25.00
## 73 Computer Science 25.00
## 74 Finance 16.67
## 75 marine geophysics 6.67
## 76 Business 17.39
## 77 MBA 20.00
## 78 political science 14.29
## 79 Nutrition/Genetics 16.00
## 80 Neuroscience 11.36
## 81 Comparative Literature 18.18
## 82 Business 18.00
## 83 International Relations 8.51
## 84 History of Religion 12.00
## 85 MBA 15.38
## 86 business 15.09
## 87 Business 14.00
## 88 Business, marketing 14.29
## 89 physics (astrophysics) 19.05
## 90 Business/ Finance/ Real Estate 16.00
## 91 Biochemistry 16.67
## 92 Art Education 12.00
## 93 Philosophy 20.51
## 94 American Studies (Masters) 16.67
## 95 Business 27.78
## 96 biology 18.60
## 97 Cell Biology 11.11
## 98 Microbiology 17.78
## 99 International Affairs/Finance 9.52
## 100 International Affairs 12.24
## 101 international affairs/international finance 15.09
## 102 English and Comp Lit 19.15
## 103 Sociomedical Sciences- School of Public Health 15.38
## 104 International Business 16.67
## 105 Medical Informatics 19.44
## 106 electrical engineering 15.56
## 107 Business/Law 19.57
## 108 law 20.93
## 109 Clinical Psychology 16.00
## 110 microbiology 15.38
## 111 International Affairs and Public Health 14.00
## 112 History 17.50
## 113 Business and International Affairs (MBA/MIA Dual Degree) 13.21
## 114 Mathematics 15.38
## 115 Mathematics 16.00
## 116 business 19.61
## 117 MA Biotechnology 11.54
## 118 International Affairs/Business 13.04
## 119 Ecology 20.00
## 120 Master in Public Administration 14.29
## 121 Computational Biochemsistry 17.02
## 122 MBA 14.89
## 123 MBA 7.50
## 124 Neurobiology 21.28
## 125 History (GSAS - PhD) 17.39
## 126 International Business 25.64
## 127 Finance 20.00
## 128 biomedicine 21.43
## 129 International Affairs 23.81
## 130 International Affairs 18.60
## 131 International Affairs 17.78
## 132 Master of International Affairs 14.00
## 133 Sociology and Education 20.00
## 134 Biology 15.00
## 135 American Studies 10.00
## 136 Arts Administration 15.00
## 137 MBA 10.00
## 138 Business 90.00
## 139 biotechnology 35.00
## 140 Business 25.00
## 141 Philosophy and Physics 25.00
## 142 nutrition 16.67
## 143 Medicine 15.00
## 144 Art Education 22.00
## 145 MA Science Education 7.00
## 146 Genetics 15.00
## 147 biology 15.00
## 148 Psychology 20.00
## 149 English 15.00
## 150 Law and English Literature (J.D./Ph.D.) 15.00
## 151 GS Postbacc PreMed 30.00
## 152 Social Work 10.00
## 153 Social Work 25.00
## 154 Molecular Biology 24.00
## 155 Biology 27.00
## 156 Sociology 20.00
## 157 MBA / Master of International Affairs (SIPA) 20.00
## 158 International Affairs 20.00
## 159 MA Teaching Social Studies 10.00
## 160 Education Policy 15.00
## 161 Education- Literacy Specialist 25.00
## 162 English 10.00
## 163 Social Work 10.00
## 164 Anthropology/Education 15.00
## 165 speech pathology 50.00
## 166 Education 20.00
## 167 Speech Pathology 5.00
## 168 Elementary Education 10.00
## 169 education 20.00
## 170 Computer Science 24.00
## 171 Finance/Economics 50.00
## 172 Museum Anthropology 25.00
## 173 Business 25.00
## 174 History 20.00
## 175 Epidemiology 5.00
## 176 International Security Policy - SIPA 15.00
## 177 Nutrition 10.00
## 178 EDUCATION 30.00
## 179 EDUCATION 15.00
## 180 Organizational Psychology 25.00
## 181 Clinical Psychology 20.00
## 182 Counseling Psychology 15.00
## 183 education 15.00
## 184 MBA 20.00
## 185 Communications in Education 9.00
## 186 Intellectual Property Law 30.00
## 187 engineering 25.00
## 188 Clinical Psychology 17.00
## 189 MBA 50.00
## 190 MBA 40.00
## 191 Business 30.00
## 192 business 30.00
## 193 MBA 25.00
## 194 Business, Media 15.00
## 195 Film 25.00
## 196 Elementary Education 25.00
## 197 School Psychology 15.00
## 198 Counseling Psychology 22.00
## 199 Law 20.00
## 200 Creative Writing 35.00
## 201 Creative Writing - Nonfiction 15.00
## 202 Writing: Literary Nonfiction 8.00
## 203 Creative Writing (Nonfiction) 18.00
## 204 NonFiction Writing 20.00
## 205 Theatre Management & Producing 15.00
## 206 Business 25.00
## 207 International Affairs 30.00
## 208 Business 20.00
## 209 Finance 35.00
## 210 business 95.00
## 211 MBA 10.00
## 212 Psychology 15.00
## 213 Law 15.00
## 214 Social Work 5.00
## 215 Social Work 10.00
## 216 Social Work 20.00
## 217 Fundraising Management 30.00
## 218 Business (Finance & Marketing) 16.00
## 219 International Affairs 20.00
## 220 MBA 60.00
## 221 Business 55.00
## 222 Elementary Education - Preservice 10.00
## 223 Social Work 15.00
## 224 MFA Writing 20.00
## 225 International Affairs 30.00
## 226 Social Work 15.00
## 227 Social Work 14.00
## 228 Bilingual Education 20.00
## 229 International Affairs - Economic Policy 30.00
## 230 MBA 50.00
## 231 SIPA - Energy 40.00
## 232 Law/Business 25.00
## 233 Law 55.00
## 234 MFA Poetry 10.00
## 235 SOA -- writing 25.00
## 236 Finance 30.00
## 237 History 20.00
## 238 art history 20.00
## 239 working 10.00
## 240 Consulting 20.00
## 241 Human Rights: Middle East 15.00
## 242 medicine 10.00
## 243 biotechnology 15.00
## 244 SIPA-International Affairs 10.00
## 245 SIPA-International Affairs 20.00
## 246 International affairs 15.00
## 247 teaching of English 25.00
## 248 MBA 20.00
## 249 GSAS 16.00
## 250 physics 30.00
## 251 journalism 25.00
## 252 Theater 20.00
## 253 Engineering 20.00
## 254 biochemistry/genetics 18.00
## 255 Biology 19.00
## 256 Epidemiology 10.00
## 257 epidemiology 30.00
## 258 Biology 30.00
## 259 Computer Science 30.00
## 260 Stats 15.00
## 261 Statistics 20.00
## 262 math of finance 18.00
## 263 MFA Acting Program 20.00
## 264 sociology 20.00
## 265 Nonfiction writing 20.00
## 266 Biochemistry & Molecular Biophysics 58.00
## 267 Theater 20.00
## 268 Social Work/SIPA 25.00
## 269 Social Work 10.00
## 270 Law 10.00
## 271 medicine 15.00
## 272 Public Health 10.00
## 273 Social Work 50.00
## 274 Electrical Engineering 25.00
## 275 Biochemistry 23.00
## 276 Electrical Engineering 20.00
## 277 Electrical Engineering 20.00
## 278 Biomedical Engineering 15.00
## 279 Biomedical Informatics 20.00
## 280 Business 40.00
## sinc1_1 intel1_1 fun1_1 amb1_1 shar1_1
## 1 20.00 20.00 15.00 15.00 15.00
## 2 5.00 25.00 20.00 0.00 5.00
## 3 20.00 20.00 20.00 10.00 10.00
## 4 5.00 25.00 25.00 10.00 15.00
## 5 25.00 20.00 25.00 5.00 15.00
## 6 15.00 25.00 20.00 15.00 10.00
## 7 15.00 15.00 40.00 10.00 5.00
## 8 0.00 0.00 40.00 0.00 0.00
## 9 5.00 15.00 40.00 5.00 5.00
## 10 10.00 20.00 10.00 10.00 20.00
## 11 15.00 25.00 10.00 5.00 10.00
## 12 11.11 11.11 11.11 11.11 22.22
## 13 20.00 20.00 10.00 10.00 20.00
## 14 20.00 20.00 15.00 20.00 15.00
## 15 20.00 15.00 15.00 15.00 15.00
## 16 20.00 20.00 15.00 15.00 10.00
## 17 10.00 18.00 20.00 25.00 10.00
## 18 22.00 18.00 15.00 15.00 10.00
## 19 10.00 20.00 20.00 10.00 5.00
## 20 20.00 30.00 10.00 0.00 10.00
## 21 23.00 23.00 22.00 7.00 5.00
## 22 10.00 30.00 25.00 10.00 15.00
## 23 10.00 20.00 10.00 10.00 0.00
## 24 20.00 20.00 15.00 15.00 10.00
## 25 20.00 25.00 20.00 10.00 0.00
## 26 25.00 20.00 15.00 10.00 10.00
## 27 20.00 20.00 20.00 5.00 15.00
## 28 10.00 20.00 15.00 10.00 10.00
## 29 10.00 35.00 15.00 0.00 15.00
## 30 20.00 15.00 30.00 5.00 15.00
## 31 20.00 20.00 20.00 0.00 0.00
## 32 25.00 25.00 10.00 10.00 10.00
## 33 18.00 18.00 15.00 17.00 15.00
## 34 15.00 20.00 15.00 15.00 20.00
## 35 10.00 20.00 20.00 10.00 10.00
## 36 15.00 45.00 25.00 0.00 10.00
## 37 25.00 25.00 10.00 10.00 20.00
## 38 15.00 20.00 20.00 5.00 10.00
## 39 17.00 22.00 20.00 8.00 13.00
## 40 10.00 20.00 20.00 15.00 10.00
## 41 15.00 0.00 15.00 0.00 10.00
## 42 20.00 20.00 20.00 10.00 10.00
## 43 10.00 30.00 20.00 10.00 20.00
## 44 20.00 20.00 20.00 15.00 10.00
## 45 25.00 20.00 10.00 20.00 15.00
## 46 10.00 20.00 20.00 20.00 20.00
## 47 20.00 20.00 20.00 10.00 10.00
## 48 25.00 15.00 15.00 15.00 10.00
## 49 20.00 20.00 20.00 5.00 20.00
## 50 15.00 20.00 15.00 20.00 15.00
## 51 3.00 50.00 20.00 2.00 10.00
## 52 20.00 20.00 20.00 20.00 10.00
## 53 18.00 18.00 19.00 13.00 14.00
## 54 7.00 20.00 23.00 0.00 0.00
## 55 20.00 15.00 20.00 0.00 5.00
## 56 30.00 20.00 10.00 10.00 0.00
## 57 5.00 20.00 10.00 10.00 15.00
## 58 15.00 25.00 15.00 20.00 5.00
## 59 15.00 20.00 20.00 0.00 5.00
## 60 20.00 35.00 10.00 5.00 20.00
## 61 24.00 21.00 23.00 5.00 8.00
## 62 20.00 15.00 15.00 5.00 25.00
## 63 15.00 25.00 25.00 15.00 5.00
## 64 10.00 30.00 20.00 15.00 5.00
## 65 10.00 20.00 20.00 20.00 10.00
## 66 10.00 35.00 35.00 8.00 2.00
## 67 15.00 5.00 30.00 15.00 0.00
## 68 15.00 10.00 20.00 5.00 10.00
## 69 10.00 20.00 5.00 5.00 10.00
## 70 10.00 10.00 10.00 10.00 20.00
## 71 15.00 20.00 15.00 12.00 18.00
## 72 40.00 15.00 10.00 5.00 5.00
## 73 10.00 20.00 20.00 10.00 15.00
## 74 16.67 16.67 16.67 16.67 16.67
## 75 20.00 20.00 17.78 20.00 15.56
## 76 17.39 15.22 17.39 13.04 19.57
## 77 20.00 20.00 20.00 6.67 13.33
## 78 14.29 21.43 21.43 19.05 9.52
## 79 16.00 16.00 18.00 18.00 16.00
## 80 18.18 22.73 18.18 11.36 18.18
## 81 18.18 18.18 18.18 13.64 13.64
## 82 18.00 20.00 18.00 16.00 10.00
## 83 19.15 19.15 19.15 19.15 14.89
## 84 20.00 20.00 20.00 14.00 14.00
## 85 17.31 17.31 17.31 17.31 15.38
## 86 18.87 18.87 16.98 16.98 13.21
## 87 18.00 16.00 18.00 18.00 16.00
## 88 16.33 16.33 18.37 18.37 16.33
## 89 19.05 19.05 19.05 9.52 14.29
## 90 18.00 18.00 18.00 14.00 16.00
## 91 18.75 18.75 18.75 18.75 8.33
## 92 20.00 20.00 20.00 16.00 12.00
## 93 23.08 20.51 17.95 2.56 15.38
## 94 12.50 20.83 18.75 12.50 18.75
## 95 19.44 19.44 27.78 2.78 2.78
## 96 13.95 18.60 16.28 16.28 16.28
## 97 17.78 17.78 17.78 17.78 17.78
## 98 20.00 17.78 15.56 15.56 13.33
## 99 23.81 16.67 16.67 16.67 16.67
## 100 20.41 16.33 14.29 20.41 16.33
## 101 16.98 16.98 16.98 18.87 15.09
## 102 21.28 17.02 17.02 12.77 12.77
## 103 19.23 19.23 17.31 17.31 11.54
## 104 16.67 16.67 16.67 16.67 16.67
## 105 19.44 22.22 16.67 11.11 11.11
## 106 11.11 22.22 15.56 20.00 15.56
## 107 19.57 15.22 17.39 15.22 13.04
## 108 16.28 23.26 16.28 11.63 11.63
## 109 16.00 20.00 16.00 18.00 14.00
## 110 19.23 19.23 19.23 13.46 13.46
## 111 18.00 20.00 18.00 16.00 14.00
## 112 17.50 20.00 12.50 20.00 12.50
## 113 18.87 18.87 16.98 16.98 15.09
## 114 19.23 19.23 19.23 15.38 11.54
## 115 20.00 18.00 20.00 18.00 8.00
## 116 15.69 17.65 15.69 17.65 13.73
## 117 13.46 19.23 19.23 19.23 17.31
## 118 21.74 17.39 17.39 15.22 15.22
## 119 15.56 20.00 15.56 11.11 17.78
## 120 18.37 18.37 16.33 18.37 14.29
## 121 21.28 17.02 21.28 14.89 8.51
## 122 19.15 21.28 19.15 14.89 10.64
## 123 17.50 20.00 22.50 12.50 20.00
## 124 17.02 21.28 12.77 6.38 21.28
## 125 17.39 19.57 19.57 10.87 15.22
## 126 17.95 15.38 20.51 10.26 10.26
## 127 16.00 18.00 16.00 16.00 14.00
## 128 16.67 21.43 16.67 11.90 11.90
## 129 23.81 23.81 23.81 2.38 2.38
## 130 20.93 23.26 23.26 2.33 11.63
## 131 17.78 17.78 17.78 13.33 15.56
## 132 20.00 18.00 16.00 14.00 18.00
## 133 20.00 20.00 10.00 10.00 20.00
## 134 20.00 15.00 20.00 15.00 15.00
## 135 20.00 20.00 20.00 20.00 10.00
## 136 20.00 20.00 15.00 15.00 15.00
## 137 30.00 30.00 10.00 10.00 10.00
## 138 2.00 2.00 2.00 2.00 2.00
## 139 20.00 10.00 20.00 10.00 5.00
## 140 15.00 20.00 20.00 15.00 5.00
## 141 7.00 25.00 25.00 8.00 10.00
## 142 16.67 16.67 16.67 16.67 16.67
## 143 25.00 20.00 20.00 10.00 10.00
## 144 14.00 30.00 10.00 14.00 10.00
## 145 35.00 20.00 20.00 5.00 13.00
## 146 18.00 19.00 19.00 17.00 12.00
## 147 20.00 20.00 20.00 15.00 10.00
## 148 20.00 20.00 10.00 20.00 10.00
## 149 15.00 25.00 20.00 5.00 20.00
## 150 5.00 45.00 10.00 15.00 10.00
## 151 20.00 20.00 10.00 15.00 5.00
## 152 15.00 40.00 15.00 10.00 10.00
## 153 20.00 25.00 25.00 5.00 0.00
## 154 16.00 28.00 14.00 8.00 10.00
## 155 15.00 28.00 10.00 10.00 10.00
## 156 20.00 15.00 25.00 10.00 10.00
## 157 30.00 10.00 20.00 0.00 20.00
## 158 18.00 20.00 17.00 10.00 15.00
## 159 18.00 20.00 16.00 16.00 20.00
## 160 20.00 35.00 15.00 5.00 10.00
## 161 25.00 15.00 15.00 10.00 10.00
## 162 30.00 20.00 10.00 15.00 15.00
## 163 40.00 10.00 20.00 10.00 10.00
## 164 20.00 25.00 15.00 15.00 10.00
## 165 5.00 20.00 10.00 5.00 10.00
## 166 25.00 25.00 10.00 15.00 5.00
## 167 20.00 25.00 25.00 15.00 10.00
## 168 20.00 20.00 15.00 20.00 15.00
## 169 20.00 20.00 20.00 10.00 10.00
## 170 15.00 20.00 20.00 8.00 13.00
## 171 20.00 10.00 10.00 5.00 5.00
## 172 5.00 30.00 15.00 5.00 20.00
## 173 20.00 15.00 25.00 10.00 5.00
## 174 16.00 16.00 16.00 16.00 16.00
## 175 25.00 20.00 10.00 15.00 25.00
## 176 15.00 15.00 20.00 20.00 15.00
## 177 20.00 27.00 23.00 15.00 5.00
## 178 5.00 10.00 20.00 30.00 5.00
## 179 25.00 15.00 15.00 15.00 15.00
## 180 20.00 10.00 20.00 20.00 5.00
## 181 10.00 20.00 20.00 20.00 10.00
## 182 25.00 20.00 20.00 0.00 20.00
## 183 20.00 20.00 20.00 20.00 5.00
## 184 10.00 25.00 25.00 10.00 10.00
## 185 17.00 23.00 15.00 14.00 22.00
## 186 20.00 30.00 5.00 5.00 10.00
## 187 20.00 20.00 20.00 5.00 10.00
## 188 18.00 28.00 27.00 5.00 5.00
## 189 10.00 10.00 10.00 10.00 10.00
## 190 0.00 10.00 50.00 0.00 0.00
## 191 15.00 15.00 20.00 5.00 15.00
## 192 10.00 15.00 20.00 15.00 10.00
## 193 15.00 20.00 20.00 10.00 10.00
## 194 15.00 20.00 15.00 15.00 20.00
## 195 25.00 25.00 10.00 10.00 5.00
## 196 20.00 20.00 20.00 10.00 5.00
## 197 10.00 30.00 10.00 15.00 20.00
## 198 17.00 15.00 15.00 15.00 16.00
## 199 30.00 20.00 20.00 0.00 10.00
## 200 0.00 35.00 10.00 10.00 10.00
## 201 20.00 15.00 30.00 10.00 10.00
## 202 20.00 25.00 25.00 12.00 10.00
## 203 19.00 16.00 15.00 15.00 17.00
## 204 20.00 20.00 10.00 10.00 20.00
## 205 18.00 18.00 18.00 15.00 16.00
## 206 10.00 20.00 20.00 10.00 15.00
## 207 25.00 20.00 15.00 5.00 5.00
## 208 20.00 20.00 20.00 10.00 10.00
## 209 15.00 15.00 25.00 10.00 0.00
## 210 1.00 1.00 1.00 1.00 1.00
## 211 20.00 40.00 10.00 10.00 10.00
## 212 20.00 30.00 20.00 5.00 10.00
## 213 25.00 20.00 15.00 10.00 15.00
## 214 30.00 20.00 10.00 5.00 30.00
## 215 10.00 40.00 10.00 15.00 15.00
## 216 10.00 30.00 20.00 10.00 10.00
## 217 25.00 20.00 10.00 10.00 5.00
## 218 25.00 20.00 12.00 12.00 15.00
## 219 20.00 20.00 20.00 20.00 20.00
## 220 10.00 10.00 10.00 10.00 0.00
## 221 10.00 15.00 15.00 5.00 0.00
## 222 25.00 15.00 20.00 15.00 15.00
## 223 30.00 30.00 10.00 10.00 5.00
## 224 20.00 20.00 20.00 10.00 10.00
## 225 15.00 15.00 30.00 0.00 10.00
## 226 15.00 18.00 18.00 14.00 20.00
## 227 15.00 16.00 17.00 18.00 20.00
## 228 10.00 50.00 5.00 10.00 5.00
## 229 25.00 10.00 20.00 0.00 15.00
## 230 10.00 10.00 30.00 0.00 0.00
## 231 12.00 18.00 12.00 6.00 12.00
## 232 15.00 25.00 25.00 5.00 5.00
## 233 2.00 1.00 40.00 1.00 1.00
## 234 20.00 20.00 20.00 10.00 20.00
## 235 0.00 25.00 25.00 0.00 25.00
## 236 10.00 10.00 30.00 10.00 10.00
## 237 10.00 20.00 15.00 15.00 20.00
## 238 20.00 20.00 20.00 20.00 0.00
## 239 20.00 20.00 15.00 20.00 15.00
## 240 20.00 20.00 20.00 20.00 0.00
## 241 25.00 15.00 15.00 15.00 15.00
## 242 20.00 25.00 25.00 10.00 10.00
## 243 20.00 10.00 30.00 5.00 20.00
## 244 15.00 30.00 10.00 15.00 20.00
## 245 20.00 20.00 15.00 15.00 10.00
## 246 15.00 20.00 20.00 15.00 15.00
## 247 20.00 15.00 20.00 5.00 15.00
## 248 10.00 22.00 20.00 15.00 13.00
## 249 16.00 20.00 16.00 16.00 16.00
## 250 0.00 30.00 30.00 10.00 0.00
## 251 25.00 25.00 25.00 0.00 0.00
## 252 20.00 20.00 15.00 15.00 10.00
## 253 30.00 20.00 20.00 10.00 0.00
## 254 18.00 18.00 18.00 18.00 10.00
## 255 19.00 19.00 19.00 5.00 19.00
## 256 15.00 35.00 15.00 10.00 15.00
## 257 10.00 20.00 10.00 10.00 20.00
## 258 30.00 40.00 0.00 0.00 0.00
## 259 0.00 30.00 15.00 15.00 10.00
## 260 15.00 20.00 15.00 15.00 20.00
## 261 20.00 20.00 20.00 20.00 0.00
## 262 10.00 18.00 18.00 18.00 18.00
## 263 20.00 20.00 20.00 5.00 15.00
## 264 20.00 20.00 15.00 5.00 20.00
## 265 20.00 20.00 15.00 10.00 15.00
## 266 5.00 8.00 10.00 7.00 12.00
## 267 25.00 25.00 30.00 5.00 5.00
## 268 15.00 25.00 15.00 10.00 10.00
## 269 30.00 20.00 15.00 15.00 10.00
## 270 20.00 15.00 20.00 15.00 20.00
## 271 20.00 25.00 20.00 10.00 10.00
## 272 15.00 30.00 20.00 15.00 10.00
## 273 20.00 10.00 5.00 10.00 5.00
## 274 20.00 20.00 20.00 5.00 10.00
## 275 14.00 19.00 16.00 10.00 18.00
## 276 5.00 25.00 25.00 5.00 20.00
## 277 16.00 16.00 17.00 16.00 15.00
## 278 25.00 20.00 10.00 10.00 20.00
## 279 20.00 20.00 20.00 10.00 10.00
## 280 20.00 20.00 20.00 0.00 0.00
Separate the clean data in to males and females
Speed_dating_female= subset(Speed_dating_final, gender == "0")
Speed_dating_male= subset(Speed_dating_final, gender == "1")
Create a pie chart of percentage of match in males
ggplot(Speed_dating_male, aes(x=group))+ geom_bar()

Create a pie chart of percentage of match in females
female_percent = plot_ly(Speed_dating_female, labels = ~group, values = ~percent, type = "pie") %>% layout(title = "Female", showlegend = TRUE)
female_percent
Plot the mean score of 5 attributes by groups in males
mean_male = setNames(aggregate(Speed_dating_male[,c( "attr1_1", "sinc1_1", "intel1_1", "fun1_1", "amb1_1", "shar1_1")],by=list(Speed_dating_male$group),mean), c("Group","Attraction", "Sincerity", "Intelligence ", "Fun","Ambition","Share"))
mean_male_long = melt(mean_male,id.vars="Group")
ggplot(mean_male_long, aes(fill=Group, y=value, x=variable)) +
geom_bar(position="dodge", stat="identity") + ylab("Score") + xlab("") + ggtitle("Mean Scores of Different Variables in Male") +theme(plot.title = element_text(hjust = 0.5))

Plot the mean score of 5 attributes by groups in females
mean_female = setNames(aggregate(Speed_dating_female[,c( "attr1_1", "sinc1_1", "intel1_1", "fun1_1", "amb1_1", "shar1_1")],by=list(Speed_dating_female$group),mean), c("Group","Attraction", "Sincerity", "Intelligence ", "Fun","Ambition","Share"))
mean_female_long = melt(mean_female,id.vars="Group")
ggplot(mean_female_long, aes(fill=Group, y=value, x=variable)) +
geom_bar(position="dodge", stat="identity") + ylab("Score") + xlab("")+ggtitle("Mean Scores of Different Variables in Male") +theme(plot.title = element_text(hjust = 0.5))

Speed_dating = read.csv("~/Desktop/washam2/Spooky Stats/SpeedDating.csv", na.strings = c("",".","NA"), stringsAsFactors = FALSE)
Speed_dating[Speed_dating$gender == 0,]$gender = "W"
Speed_dating[Speed_dating$gender == 1,]$gender = "M"
Speed_dating %>%
group_by(iid) %>%
summarise(gender = head(gender,1)) %>%
group_by(gender) %>%
summarise(count = n())
## # A tibble: 2 x 2
## gender count
## <chr> <int>
## 1 M 277
## 2 W 274
gender_waves = subset(Speed_dating, !duplicated(Speed_dating[, 1])) %>%
group_by(wave, gender) %>%
summarise(count = n()) %>%
melt(id.vars = c("gender", "wave"))
ggplot(gender_waves, aes(x = wave, y = value, fill = factor(gender))) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_discrete(name = "Gender") +
xlab("Wave") + ylab("Population") + ggtitle("Gender repartition in waves")

age_waves = subset(Speed_dating, !duplicated(Speed_dating[, 1])) %>%
filter(!is.na(age)) %>%
group_by(wave, gender) %>%
summarise(count = mean(age)) %>%
melt(id.vars = c("gender", "wave"))
ggplot(age_waves, aes(x = wave, y = value, fill = factor(gender))) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_discrete(name = "Gender") +
xlab("Wave") + ylab("Population") + ggtitle("Age repartition in waves")

# Group df by age and age_0
age_analysis = Speed_dating %>%
group_by(age, age_o) %>%
summarise(
people = n(),
matches = sum(match)) %>%
filter(!is.na(age) & !is.na(age_o))
# Filter with age difference > 5 years, and with more than 5 matches
age_diff = age_analysis %>%
filter(age - age_o >= 0) %>%
mutate(years = age - age_o) %>%
group_by(years) %>%
summarise(
matches = sum(matches)) %>%
arrange(years)
# Graph result
ggplot(age_diff[age_diff$years < 20,], aes(x = years, y = matches)) +
geom_bar(stat = "identity", position = "dodge") +
xlab("Number of years of difference between people's age") +
ylab("Number of matches") + ggtitle("Does age really matter?")

fields_cd = c(
"Law",
"Math",
"Social Science, Psychologist" ,
"Medical Science, Pharmaceuticals, and Bio Tech",
"Engineering",
"English/Creative Writing/ Journalism",
"History/Religion/Philosophy",
"Business/Econ/Finance",
"Education, Academia",
"Biological Sciences/Chemistry/Physics",
"Social Work" ,
"Undergrad/undecided" ,
"Political Science/International Affairs" ,
"Film",
"Fine Arts/Arts Administration",
"Languages",
"Architecture",
"Other"
)
# Create career codes
career_cd = c(
"Lawyer",
"Academic/Research",
"Psychologist",
"Doctor/Medicine",
"Engineer",
"Creative Arts/Entertainment",
"BankingBusiness/CEO/Admin",
"Real Estate",
"International/Humanitarian Affairs",
"Undecided" ,
"Social Work",
"Speech Pathology",
"Politics",
"Pro sports/Athletics",
"Other",
"Journalism",
"Architecture"
)
# Find number of men/women on each study field
fields = Speed_dating[!is.na(Speed_dating$field_cd),] %>%
group_by(gender, field_cd) %>%
summarise(
count = n()
)
# Find number of men/women on each career
careers = Speed_dating[!is.na(Speed_dating$career_c),] %>%
group_by(gender, career_c) %>%
summarise(
count = n()
)
# Plot study fields repartition
ggplot(fields, aes(x = field_cd, y = count, fill = factor(gender))) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_discrete(name = "Gender") +
xlab("Field") + ylab("Count") + ggtitle("Study fields repartition") +
scale_x_continuous(labels = fields_cd, breaks = 1:18) +
coord_flip()

ggplot(careers, aes(x = career_c, y = count, fill = factor(gender))) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_discrete(name = "Gender") +
xlab("Career") + ylab("Count") + ggtitle("Careers repartition") +
scale_x_continuous(labels = career_cd, breaks = 1:17) +
coord_flip()

race_c = c(
"European/Caucasian-American",
"Black/African American",
"Asian/Pacific Islander/Asian-American",
"Latino/Hispanic American",
"Native American",
"Other"
)
# Find number of men/women for each race
races = Speed_dating[!is.na(Speed_dating$race),] %>%
group_by(gender, race) %>%
summarise(
my = n()
)
# Plot race repartition
ggplot(races, aes(x = race, y = my, fill = factor(gender))) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_discrete(name = "Gender") +
xlab("Race") + ylab("Count") + ggtitle("Race repartition") +
scale_x_continuous(labels = race_c, breaks = 1:6) +
coord_flip()

match_by_gender = Speed_dating %>%
group_by(gender) %>%
summarise(
matches = sum(match == 1),
fails = sum(match == 0)) %>%
melt(id.vars = "gender")
# Plot matches for both men and women
ggplot(match_by_gender, aes(x = variable, y = value, fill = factor(gender))) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_discrete(name = "Gender") + ggtitle("Matches by gender") +
xlab("Result") + ylab("Count")

match_by_waves = Speed_dating[Speed_dating$match == 1,] %>%
group_by(wave) %>%
summarise(
matches = sum(match == 1)
)
# Plot matches for waves: what was the best wave to be?
ggplot(match_by_waves, aes(x = wave, y = matches)) +
geom_bar(stat = "identity", position = "dodge") + ggtitle("Matches by waves") +
xlab("Wave number") + ylab("Matches")

male = Speed_dating[Speed_dating$gender == "M",]
first_col = head(grep("sports", colnames(Speed_dating)),1)
last_col = head(grep("yoga", colnames(Speed_dating)),1)
match_col = head(grep("match", colnames(Speed_dating)),1)
male = male[complete.cases(male[first_col:last_col]),]
combined_male = male %>% group_by(iid) %>% summarise(match_sum = sum(match))
number_male = combined_male %>% group_by(match_sum) %>% summarise(count = n())
ggplot(number_male, aes(x = match_sum, y = count))+ geom_bar(stat = "identity", position = "dodge", colour = "black") + ggtitle("Number of men per number of matches") + xlab("Number of matches")

female = Speed_dating[Speed_dating$gender == "W",]
first_col = head(grep("sports", colnames(Speed_dating)),1)
last_col = head(grep("yoga", colnames(Speed_dating)),1)
match_col = head(grep("match", colnames(Speed_dating)),1)
female = female[complete.cases(female[first_col:last_col]),]
combined_female = female %>% group_by(iid) %>% summarise(match_sum = sum(match))
number_female = combined_female %>% group_by(match_sum) %>% summarise(count = n())
ggplot(number_female, aes(x = match_sum, y = count))+ geom_bar(stat = "identity", position = "dodge", colour = "black") + ggtitle("Number of women per number of matches") + xlab("Number of matches")

# important features for men
men_matches = male[Speed_dating$match == 1,]
men_feature = men_matches %>%
group_by(gender) %>%
summarise(
sports = sum(sports, na.rm=T),
tvsports = sum(tvsports, na.rm=T),
exercise = sum(exercise, na.rm=T),
dining = sum(dining, na.rm=T),
museums = sum(museums, na.rm=T),
art = sum(art, na.rm=T),
hiking = sum(hiking, na.rm=T),
gaming = sum(gaming, na.rm=T),
clubbing = sum(clubbing, na.rm=T),
reading = sum(reading, na.rm=T),
tv = sum(tv, na.rm=T),
music = sum(music, na.rm=T),
theater = sum(theater, na.rm=T),
movies = sum(movies, na.rm=T),
concerts = sum(concerts, na.rm=T),
shopping = sum(shopping, na.rm=T),
yoga = sum(yoga, na.rm=T)
) %>%
melt(id_vars = "gender")
## Using gender as id variables
ggplot(men_feature[,c(2,3)], aes(x = reorder(variable, -value), y = value)) +
geom_bar(stat = "identity", position = "dodge", colour="black") +
xlab("Feature") + ylab("Count") + ggtitle("Importance of a feature for men") +
coord_flip()

women_matches = female[Speed_dating$match == 1,]
women_feature = women_matches %>%
group_by(gender) %>%
summarise(
sports = sum(sports, na.rm=T),
tvsports = sum(tvsports, na.rm=T),
exercise = sum(exercise, na.rm=T),
dining = sum(dining, na.rm=T),
museums = sum(museums, na.rm=T),
art = sum(art, na.rm=T),
hiking = sum(hiking, na.rm=T),
gaming = sum(gaming, na.rm=T),
clubbing = sum(clubbing, na.rm=T),
reading = sum(reading, na.rm=T),
tv = sum(tv, na.rm=T),
music = sum(music, na.rm=T),
theater = sum(theater, na.rm=T),
movies = sum(movies, na.rm=T),
concerts = sum(concerts, na.rm=T),
shopping = sum(shopping, na.rm=T),
yoga = sum(yoga, na.rm=T)
) %>%
melt(id_vars = "gender")
## Using gender as id variables
ggplot(women_feature[,c(2,3)], aes(x = reorder(variable, -value), y = value)) +
geom_bar(stat = "identity", position = "dodge", colour="black") +
xlab("Feature") + ylab("Count") + ggtitle("Importance of a feature for Women") +
coord_flip()

Speed_dating_combined = Speed_dating[!duplicated(Speed_dating$iid), ]
Speed_dating_combined$zipcode = as.numeric(gsub(",","",Speed_dating_combined$zipcode))
Speed_dating_zip = clean.zipcodes(Speed_dating_combined$zipcode)
data(zipcode)
us = map_data('state')
plot = subset(zipcode, subset = (zip %in% Speed_dating_zip))
ggplot(plot, aes(longitude,latitude))+geom_polygon(data=us,aes(x=long,y=lat,group=group),alpha=.35) + geom_point(size = 1.3, colour="#0072B2", alpha = .25) + xlim(-125,-65)+ylim(20,50) + theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_blank(), axis.text = element_blank(), axis.title = element_blank(),axis.ticks = element_blank(),plot.title = element_text(hjust = 0.5)) + ggtitle("Location of Participants")
## Warning: Removed 5 rows containing missing values (geom_point).

invalid_zipcode = rbind(Speed_dating_combined[(is.na(Speed_dating_combined$zipcode)),], Speed_dating_combined[grep("^[0-9]{1,3}$",Speed_dating_combined$zipcode),], Speed_dating_combined[grep("^[0-9]{6,7}$",Speed_dating_combined$zipcode),])
# number of people who did not provide zipcode or from different country
count(invalid_zipcode)
## # A tibble: 1 x 1
## n
## <int>
## 1 128
citation(package="readxl")
##
## To cite package 'readxl' in publications use:
##
## Hadley Wickham and Jennifer Bryan (2017). readxl: Read Excel
## Files. R package version 1.0.0.
## https://CRAN.R-project.org/package=readxl
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {readxl: Read Excel Files},
## author = {Hadley Wickham and Jennifer Bryan},
## year = {2017},
## note = {R package version 1.0.0},
## url = {https://CRAN.R-project.org/package=readxl},
## }
citation(package="ggplot2")
##
## To cite ggplot2 in publications, please use:
##
## H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
## Springer-Verlag New York, 2009.
##
## A BibTeX entry for LaTeX users is
##
## @Book{,
## author = {Hadley Wickham},
## title = {ggplot2: Elegant Graphics for Data Analysis},
## publisher = {Springer-Verlag New York},
## year = {2009},
## isbn = {978-0-387-98140-6},
## url = {http://ggplot2.org},
## }
citation(package="data.table")
##
## To cite package 'data.table' in publications use:
##
## Matt Dowle and Arun Srinivasan (2017). data.table: Extension of
## `data.frame`. R package version 1.10.4.
## https://CRAN.R-project.org/package=data.table
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {data.table: Extension of `data.frame`},
## author = {Matt Dowle and Arun Srinivasan},
## year = {2017},
## note = {R package version 1.10.4},
## url = {https://CRAN.R-project.org/package=data.table},
## }
citation(package="dplyr")
##
## To cite package 'dplyr' in publications use:
##
## Hadley Wickham, Romain Francois, Lionel Henry and Kirill Müller
## (2017). dplyr: A Grammar of Data Manipulation. R package version
## 0.7.4. https://CRAN.R-project.org/package=dplyr
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {dplyr: A Grammar of Data Manipulation},
## author = {Hadley Wickham and Romain Francois and Lionel Henry and Kirill Müller},
## year = {2017},
## note = {R package version 0.7.4},
## url = {https://CRAN.R-project.org/package=dplyr},
## }
citation(package="maps")
##
## To cite package 'maps' in publications use:
##
## Original S code by Richard A. Becker, Allan R. Wilks. R version
## by Ray Brownrigg. Enhancements by Thomas P Minka and Alex
## Deckmyn. (2017). maps: Draw Geographical Maps. R package version
## 3.2.0. https://CRAN.R-project.org/package=maps
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {maps: Draw Geographical Maps},
## author = {Original S code by Richard A. Becker and Allan R. Wilks. R version by Ray Brownrigg. Enhancements by Thomas P Minka and Alex Deckmyn.},
## year = {2017},
## note = {R package version 3.2.0},
## url = {https://CRAN.R-project.org/package=maps},
## }
##
## ATTENTION: This citation information has been auto-generated from
## the package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
citation(package="ggmap")
##
## To cite ggmap in publications, please use:
##
## D. Kahle and H. Wickham. ggmap: Spatial Visualization with
## ggplot2. The R Journal, 5(1), 144-161. URL
## http://journal.r-project.org/archive/2013-1/kahle-wickham.pdf
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## author = {David Kahle and Hadley Wickham},
## title = {ggmap: Spatial Visualization with ggplot2},
## journal = {The R Journal},
## year = {2013},
## volume = {5},
## number = {1},
## pages = {144--161},
## url = {http://journal.r-project.org/archive/2013-1/kahle-wickham.pdf},
## }
citation(package="zipcode")
##
## To cite package 'zipcode' in publications use:
##
## Jeffrey Breen (2012). zipcode: U.S. ZIP Code database for
## geocoding. R package version 1.0.
## https://CRAN.R-project.org/package=zipcode
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {zipcode: U.S. ZIP Code database for geocoding},
## author = {Jeffrey Breen},
## year = {2012},
## note = {R package version 1.0},
## url = {https://CRAN.R-project.org/package=zipcode},
## }
##
## ATTENTION: This citation information has been auto-generated from
## the package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
citation(package="reshape2")
##
## To cite reshape2 in publications use:
##
## Hadley Wickham (2007). Reshaping Data with the reshape Package.
## Journal of Statistical Software, 21(12), 1-20. URL
## http://www.jstatsoft.org/v21/i12/.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {Reshaping Data with the {reshape} Package},
## author = {Hadley Wickham},
## journal = {Journal of Statistical Software},
## year = {2007},
## volume = {21},
## number = {12},
## pages = {1--20},
## url = {http://www.jstatsoft.org/v21/i12/},
## }
citation(package="shiny")
##
## To cite package 'shiny' in publications use:
##
## Winston Chang, Joe Cheng, JJ Allaire, Yihui Xie and Jonathan
## McPherson (2017). shiny: Web Application Framework for R. R
## package version 1.0.5. https://CRAN.R-project.org/package=shiny
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {shiny: Web Application Framework for R},
## author = {Winston Chang and Joe Cheng and JJ Allaire and Yihui Xie and Jonathan McPherson},
## year = {2017},
## note = {R package version 1.0.5},
## url = {https://CRAN.R-project.org/package=shiny},
## }
"Leverger, speed-dating-experiment-r (2016), GitHub repository,https://colinleverger.github.io/speed-dating-experiment-r/"
## [1] "Leverger, speed-dating-experiment-r (2016), GitHub repository,https://colinleverger.github.io/speed-dating-experiment-r/"